gold = do.call(rbind, spgold)
gold = rename(gold, c(legislative_type="gold_legislativetype"))
gold = rename(gold, c(regime="gold_regime"))
gold = gold[, c("ccode", "year", "gold_legislativetype", "gold_regime")]
#Keefer files
keef = read.dta("~/Downloads/DPI2012.dta")
keef = keef[, c("ifs", "system", "year", "execrlc")]
keef$system[keef$system==-999] = NA
keef$execrlc[keef$execrlc==-999] = NA
keef = keef[!keef$ifs=="0", ]
keef = keef[!(is.na(keef$system) & is.na(keef$execrlc)),  ]
keef = rename(keef, c(system="keef_system", execrlc="keef_execrlc"))
#Working dataset
work = read.dta("~/Downloads/BW_pwt71_07222015.dta")
work = merge(work, keef, by.x=c("ccodewb","year") , by.y=c("ifs", "year") , all.x = T)
work = merge(work, gold, by.x=c("cown", "year"), by.y=c("ccode", "year"), all.x = T)
write.dta(work, "/Volumes/TINY CRYPT/papers/Working Projects/Pre-Fall 2012/dbab_project/DBAB 2014 Updates/BW_pwt71_01222017.dta")
write.dta(work, "~/Downloads/BW_pwt71_01222017.dta")
table(work$ccodewb)
sort(table(work$ccodewb))
work[work$ccodewb=="THA", c("ccodewb", "year")]
keef[keef$ccodewb=="THA", c("ccodewb", "year")]
keef[keef$ccodewb=="THA", c("ccodewb", "year", "cown")]
work[work$ccodewb=="THA", c("ccodewb", "year", "cown")]
work = read.dta("~/Downloads/BW_pwt71_07222015.dta")
work[work$ccodewb=="THA", c("ccodewb", "year", "cown")]
work$year[work$ccodewb=="THA", c("ccodewb", "year", "cown")]
work$year[work$ccodewb=="THA"]
sort(work$year[work$ccodewb=="THA"])
length(work$year[work$ccodewb=="THA"])
keef$year[keef$ifs=="THA"]
sort(keef$year[keef$ifs=="THA"])
sort(gold$year[gold$ccode==800])
gold[gold$ccode==800, ]
unique(gold[gold$ccode==800, ])
dim(gold[gold$ccode==800, ])
dim(unique(gold[gold$ccode==800, ]))
gold = read.csv("~/Downloads/es_data-v2_0_1/es_data-v2_0_1.csv", stringsAsFactors = F)
gold = gold[, c("ccode", "year", "legislative_type", "regime", "elec_id")]
gold = gold[grep("^L", gold$elec_id), ]
gold[gold$ccode==800, ]
duplicated(gold$year[gold$ccode==800])
library(foreign)
library(zoo)
library(countrycode)
library(plyr)
library(countrycode)
#Function to create missing years, with NA's, then approximate the missing ones
fillna = function(x){
if(nrow(x)>1){
# create missing years
allyears = min(x$year, na.rm=T):max(x$year, na.rm=T)
missyears = setdiff(allyears, x$year)
if(length(missyears==0)){
newdf = data.frame(ccode=unique(x$ccode), year=missyears, legislative_type=NA, regime=NA, elec_id="none")
x = rbind(x[!duplicated(x$year) , ], newdf) #sometimes two elections in the same year
# interpolate
x = x[order(x$year), ]
x$legislative_type = round(na.approx(x$legislative_type, na.rm=F))
x$regime = round(na.approx(x$regime, na.rm=F))
}
}
return(x)
}
#Golder files
gold = read.csv("~/Downloads/es_data-v2_0_1/es_data-v2_0_1.csv", stringsAsFactors = F)
gold = gold[, c("ccode", "year", "legislative_type", "regime", "elec_id")]
gold = gold[grep("^L", gold$elec_id), ]
gold$legislative_type = ifelse(gold$legislative_type==-99, NA, gold$legislative_type)
spgold = split(gold, as.factor(gold$ccode))
# apply the interpolation function, and rebind the data
spgold = lapply(spgold, function(x) fillna(x))
gold = do.call(rbind, spgold)
gold = rename(gold, c(legislative_type="gold_legislativetype"))
gold = rename(gold, c(regime="gold_regime"))
gold = gold[, c("ccode", "year", "gold_legislativetype", "gold_regime")]
#Keefer files
keef = read.dta("~/Downloads/DPI2012.dta")
keef = keef[, c("ifs", "system", "year", "execrlc")]
keef$system[keef$system==-999] = NA
keef$execrlc[keef$execrlc==-999] = NA
keef = keef[!keef$ifs=="0", ]
keef = keef[!(is.na(keef$system) & is.na(keef$execrlc)),  ]
keef = rename(keef, c(system="keef_system", execrlc="keef_execrlc"))
#Working dataset
work = read.dta("~/Downloads/BW_pwt71_07222015.dta")
work = merge(work, keef, by.x=c("ccodewb","year") , by.y=c("ifs", "year") , all.x = T)
work = merge(work, gold, by.x=c("cown", "year"), by.y=c("ccode", "year"), all.x = T)
sort(table(work$ccode))
work = read.dta("~/Downloads/BW_pwt71_07222015.dta")
library(foreign)
library(zoo)
library(countrycode)
library(plyr)
library(countrycode)
#Function to create missing years, with NA's, then approximate the missing ones
fillna = function(x){
if(nrow(x)>1){
# create missing years
allyears = min(x$year, na.rm=T):max(x$year, na.rm=T)
missyears = setdiff(allyears, x$year)
if(length(missyears==0)){
newdf = data.frame(ccode=unique(x$ccode), year=missyears, legislative_type=NA, regime=NA, elec_id="none")
x = rbind(x[!duplicated(x$year) , ], newdf) #sometimes two elections in the same year
# interpolate
x = x[order(x$year), ]
x$legislative_type = round(na.approx(x$legislative_type, na.rm=F))
x$regime = round(na.approx(x$regime, na.rm=F))
}
}
return(x)
}
#Golder files
gold = read.csv("~/Downloads/es_data-v2_0_1/es_data-v2_0_1.csv", stringsAsFactors = F)
gold = gold[, c("ccode", "year", "legislative_type", "regime", "elec_id")]
gold = gold[grep("^L", gold$elec_id), ]
gold$legislative_type = ifelse(gold$legislative_type==-99, NA, gold$legislative_type)
spgold = split(gold, as.factor(gold$ccode))
# apply the interpolation function, and rebind the data
spgold = lapply(spgold, function(x) fillna(x))
gold = do.call(rbind, spgold)
gold = rename(gold, c(legislative_type="gold_legislativetype"))
gold = rename(gold, c(regime="gold_regime"))
gold = gold[, c("ccode", "year", "gold_legislativetype", "gold_regime")]
#Keefer files
keef = read.dta("~/Downloads/DPI2012.dta")
keef = keef[, c("ifs", "system", "year", "execrlc")]
keef$system[keef$system==-999] = NA
keef$execrlc[keef$execrlc==-999] = NA
keef = keef[!keef$ifs=="0", ]
keef = keef[!(is.na(keef$system) & is.na(keef$execrlc)),  ]
keef = rename(keef, c(system="keef_system", execrlc="keef_execrlc"))
#Working dataset
work = read.dta("~/Downloads/BW_pwt71_07222015.dta")
work = merge(work, keef, by.x=c("ccodewb","year") , by.y=c("ifs", "year") , all.x = T)
work = merge(work, gold, by.x=c("cown", "year"), by.y=c("ccode", "year"), all.x = T)
write.dta(work, "/Volumes/TINY CRYPT/papers/Working Projects/Pre-Fall 2012/dbab_project/DBAB 2014 Updates/BW_pwt71_01222017.dta")
write.dta(work, "~/Downloads/BW_pwt71_01222017.dta")
library(streamR)
install.packages("streamR", "RCurl", "ROAuth", "RJSONIO")
install.packages("streamR")
library(RCurl)
library(RJSONIO)
library(stringr)
library(ROAuth)
install.packages("ROAuh")
install.packages("ROAuth")
library(ROAuth)
requestURL <- "https://api.twitter.com/oauth/request_token"
accessURL <- "https://api.twitter.com/oauth/access_token"
authURL <- "https://api.twitter.com/oauth/authorize"
consumerKey <- "oLdN3mYuD3604LyHI9B2vvpSP" # From dev.twitter.com
consumerSecret <- "if3dfYz8PB1ECQAZuUWf4g5jnVBa6FQV6BWXA3T3j39hiJoFkD" # From dev.twitter.com
my_oauth <- OAuthFactory$new(consumerKey = consumerKey,
consumerSecret = consumerSecret,
requestURL = requestURL,
accessURL = accessURL,
authURL = authURL)
my_oauth$handshake(cainfo = system.file("CurlSSL", "cacert.pem", package = "RCurl"))
### STOP HERE!!! ###
# PART 2: Save the my_oauth data to an .Rdata file
save(my_oauth, file = "Desktop/my_oauth.Rdata")
requestURL <- "https://api.twitter.com/oauth/request_token"
accessURL <- "https://api.twitter.com/oauth/access_token"
authURL <- "https://api.twitter.com/oauth/authorize"
consumerKey <- "9Zgt6buY5BhqgJmXZVTAuSatP" # From dev.twitter.com
consumerSecret <- " n2PIU3WxD8RSpcqSS1vxxd4rVDYKIfuds6qFnEWk16LqSj1KLz" # From dev.twitter.com
my_oauth <- OAuthFactory$new(consumerKey = consumerKey,
consumerSecret = consumerSecret,
requestURL = requestURL,
accessURL = accessURL,
authURL = authURL)
my_oauth$handshake(cainfo = system.file("CurlSSL", "cacert.pem", package = "RCurl"))
library(streamR)
library(RCurl)
library(RJSONIO)
library(stringr)
library(ROAuth)
requestURL <- "https://api.twitter.com/oauth/request_token"
accessURL <- "https://api.twitter.com/oauth/access_token"
authURL <- "https://api.twitter.com/oauth/authorize"
consumerKey <- "9Zgt6buY5BhqgJmXZVTAuSatP" # From dev.twitter.com
consumerSecret <- " n2PIU3WxD8RSpcqSS1vxxd4rVDYKIfuds6qFnEWk16LqSj1KLz" # From dev.twitter.com
my_oauth <- OAuthFactory$new(consumerKey = consumerKey,
consumerSecret = consumerSecret,
requestURL = requestURL,
accessURL = accessURL,
authURL = authURL)
my_oauth$handshake(cainfo = system.file("CurlSSL", "cacert.pem", package = "RCurl"))
my_oauth$handshake
?my_oauth$handshake
requestURL <- "https://api.twitter.com/oauth/request_token"
accessURL <- "https://api.twitter.com/oauth/access_token"
authURL <- "https://api.twitter.com/oauth/authorize"
consumerKey <- "9Zgt6buY5BhqgJmXZVTAuSatP" # From dev.twitter.com
consumerSecret <- " n2PIU3WxD8RSpcqSS1vxxd4rVDYKIfuds6qFnEWk16LqSj1KLz" # From dev.twitter.com
my_oauth <- OAuthFactory$new(consumerKey = consumerKey,
consumerSecret = consumerSecret,
requestURL = requestURL,
accessURL = accessURL,
authURL = authURL)
my_oauth$handshake(cainfo = system.file("CurlSSL", "cacert.pem", package = "RCurl"))
library(streamR)
library(RCurl)
library(RJSONIO)
library(stringr)
library(ROAuth)
requestURL <- "https://api.twitter.com/oauth/request_token"
accessURL <- "https://api.twitter.com/oauth/access_token"
authURL <- "https://api.twitter.com/oauth/authorize"
consumerKey <- "9Zgt6buY5BhqgJmXZVTAuSatP" # From dev.twitter.com
consumerSecret <- " n2PIU3WxD8RSpcqSS1vxxd4rVDYKIfuds6qFnEWk16LqSj1KLz" # From dev.twitter.com
my_oauth <- OAuthFactory$new(consumerKey = consumerKey,
consumerSecret = consumerSecret,
requestURL = requestURL,
accessURL = accessURL,
authURL = authURL)
my_oauth$handshake(cainfo = system.file("CurlSSL", "cacert.pem", package = "RCurl"))
system.file("CurlSSL", "cacert.pem", package = "RCurl")
my_oauth$handshake(cainfo = system.file("CurlSSL", "cacert.pem", package = "RCurl"))
library(ROAuth)
requestURL <- "https://api.twitter.com/oauth/request_token"
accessURL <- "https://api.twitter.com/oauth/access_token"
authURL <- "https://api.twitter.com/oauth/authorize"
consumerKey <- "9Zgt6buY5BhqgJmXZVTAuSatP" # From dev.twitter.com
consumerSecret <- " n2PIU3WxD8RSpcqSS1vxxd4rVDYKIfuds6qFnEWk16LqSj1KLz" # From dev.twitter.com
my_oauth <- OAuthFactory$new(consumerKey = consumerKey,
consumerSecret = consumerSecret,
requestURL = requestURL,
accessURL = accessURL,
authURL = authURL)
my_oauth$handshake(cainfo = system.file("CurlSSL", "cacert.pem", package = "RCurl"))
library(streamR)
library(RCurl)
library(RJSONIO)
library(stringr)
library(ROAuth)
requestURL <- "https://api.twitter.com/oauth/request_token"
accessURL <- "https://api.twitter.com/oauth/access_token"
authURL <- "https://api.twitter.com/oauth/authorize"
consumerKey <- "RBqMIx6N1yPcI0dQiHGyqlICp" # From dev.twitter.com
consumerSecret <- " Cddv36qhxk7c0fgyuSgUyvLigWWm14NaVZr0gXHRCPecYJ0jzZ" # From dev.twitter.com
my_oauth <- OAuthFactory$new(consumerKey = consumerKey,
consumerSecret = consumerSecret,
requestURL = requestURL,
accessURL = accessURL,
authURL = authURL)
my_oauth$handshake(cainfo = system.file("CurlSSL", "cacert.pem", package = "RCurl"))
library(streamR)
library(RCurl)
library(RJSONIO)
library(stringr)
library(ROAuth)
requestURL <- "https://api.twitter.com/oauth/request_token"
accessURL <- "https://api.twitter.com/oauth/access_token"
authURL <- "https://api.twitter.com/oauth/authorize"
consumerKey <- "JeNt8110Hl92l2hVFdnWsBSdj" # From dev.twitter.com
consumerSecret <- " gxBQiNo1O6UskwHlxWPEQcqVpw0ig8lbZALcDaRjxxLzYBmWlA" # From dev.twitter.com
my_oauth <- OAuthFactory$new(consumerKey = consumerKey,
consumerSecret = consumerSecret,
requestURL = requestURL,
accessURL = accessURL,
authURL = authURL)
my_oauth$handshake(cainfo = system.file("CurlSSL", "cacert.pem", package = "RCurl"))
?OAuthFactory
Error: Authorization Required
credentials$OAuthRequest(testURL, "GET")
credentials$my_oauth(testURL, "GET")
my_oauth$handshakeComplete
my_oauth$handshake()
my_oauth <- OAuthFactory$new(consumerKey = consumerKey,
consumerSecret = consumerSecret,
requestURL = requestURL,
accessURL = accessURL,
authURL = authURL)
my_oauth$handshake()
my_oauth$handshake
?my_oauth$handshake
requestURL <- "https://api.twitter.com/oauth/request_token"
accessURL <- "https://api.twitter.com/oauth/access_token"
authURL <- "https://api.twitter.com/oauth/authorize"
consumerKey <- "JeNt8110Hl92l2hVFdnWsBSdj" # From dev.twitter.com
consumerSecret <- " gxBQiNo1O6UskwHlxWPEQcqVpw0ig8lbZALcDaRjxxLzYBmWlA" # From dev.twitter.com
my_oauth <- OAuthFactory$new(consumerKey = consumerKey,
consumerSecret = consumerSecret,
requestURL = requestURL,
accessURL = accessURL,
authURL = authURL)
my_oauth$handshake(cainfo = system.file("CurlSSL", "cacert.pem", package = "RCurl"))
dat <- read.csv("/Volumes/TINY CRYPT/papers/Working Projects/Dissertation/Main Work Folder/covoting_2014_clean04012016.csv")
fit1 <- lmer(co.vote~netcom13+netsoc13+netsoc14*netcom14+same.party+same.state+same.coalition+(1|ego)+(1|alter), data=dat)
library(effects)
library(lme4)
library(corrplot)
library(arm)
library(reshape)
#### RANDOMIZATION??
# FIRST MODEL WILL BE THE COMMITTEE MODEL:
dat <- read.csv("/Volumes/TINY CRYPT/papers/Working Projects/Dissertation/Main Work Folder/covoting_2014_clean04012016.csv")
fit1 <- lmer(co.vote~netcom13+netsoc13+netsoc14*netcom14+same.party+same.state+same.coalition+(1|ego)+(1|alter), data=dat)
stargazer(fit1)
library(stargazer)
stargazer(fit1)
library(effects)
effects(fit1)
Alleffects(fit1)
allEffects(fit1)
fit1
summary(fit1)
fit3 <- lmer(co.vote~co.vote.lag+netsoc13+netcom13+netsoc14*netcom14+(1|ego)+(1|alter), data=dat)
dat <- read.csv("/Volumes/TINY CRYPT/papers/Working Projects/Dissertation/Main Work Folder/Clean Data/covoting_2014_clean02092016.csv")
fit3 <- lmer(co.vote~co.vote.lag+netsoc13+netcom13+netsoc14*netcom14+(1|ego)+(1|alter), data=dat)
summary(fit3)
fit1 <- lmer(co.vote~co.vote.lag+netsoc14*netcom14+(1|ego)+(1|alter), data=dat)
summary(fit1)
fit2 <- lmer(co.vote~co.vote.lag+netsoc13*netcom13+(1|ego)+(1|alter), data=dat)
summary(fit1)
summary(fit2)
names(dat)
fit1 <- lmer(co.vote~co.vote.lag+vnetsoc14*vnetcom14+(1|ego)+(1|alter), data=dat)
names(dat)
fit1 <- lmer(co.vote~co.vote.lag+vnetsoc2014*vnetcom2014+(1|ego)+(1|alter), data=dat)
summary(fit1)
fit4 <- lmer(co.vote~co.vote.lag+netsoc13+netcom13+vnetsoc2014+vnetcom2014+(1|ego)+(1|alter), data=dat)
summary(fit4)
stargazer(fit4)
dat <- read.csv("/Volumes/TINY CRYPT/papers/Working Projects/Dissertation/Main Work Folder/Clean Data/covoting_2014_clean02092016.csv")
#Model with differences in dif
fit1 <- lmer(co.vote~co.vote.lag+netsoc14*netcom14+(1|ego)+(1|alter), data=dat)
fit2 <- lmer(co.vote~co.vote.lag+netsoc13*netcom13+(1|ego)+(1|alter), data=dat)
fit3 <- lmer(co.vote~co.vote.lag+netsoc13+netcom13+netsoc14*netcom14+(1|ego)+(1|alter), data=dat)
fit4 <- lmer(co.vote~co.vote.lag+netsoc13+netcom13+netsoc14+netcom14+(1|ego)+(1|alter), data=dat)
#Placebo test
dat$placebo <- dat$netcom14-dat$netcom13
fplac <- lmer(co.vote.lag~placebo+diff.mandates+same.state+(1|ego)+(1|alter), data=dat)
fit4 <- lmer(co.vote~co.vote.lag+netsoc13+netcom13+netsoc14+netcom14+(1|ego)+(1|alter), data=dat)
mod <- fit4
coefplot(fixef(mod)[-1], sds=sqrt(diag(vcov.merMod(mod)))[-1], varnames=names(fixef(mod))[-1], main="Results: Covoting ML Model, DID")
eff <- effect("netsoc14*netcom14", fit1, xlevels=list(netsoc14=c(0,1), netcom14=c(0,1)))
exp(eff$fit)[3]/exp(eff$fit)[1] #relative increases for comm networks versus none
exp(eff$fit)[2]/exp(eff$fit)[1] #relative increases for social networks versus none
exp(eff$fit)[4]/exp(eff$fit)[1] #relative increases for overlapping networks versus none
exp(eff$fit)[4]
fit
summary(fit)
eff
summary(fit1)
50*.18
41
dat <- read.csv("/Volumes/TINY CRYPT/papers/Working Projects/Dissertation/Main Work Folder/Clean Data/covoting_2014_clean02092016.csv")
names(dat)
table(dat$netcom13, dat$netcom14)
table(dat$netsoc13, dat$netsoc14)
table(dat$netcom14, dat$netsoc14)
table(dat$netsoc13==1, dat$netsoc14==0)
table(dat$netcom13==1, dat$netcom14==0)
table(dat$netcom13==0, dat$netcom14==1)
table(dat$netsoc13==0, dat$netsoc14==1)
493+128
library(countrycode)
library(lubridate)
library(ggplot2)
library(reshape)
library(caret)
library(lme4)
library(RColorBrewer)
library(hydroGOF)
setwd("~/Downloads/Science Replication and Data - Polling/")
dat <- read.csv("global_polling_replication_data.csv")
unique(dat$Country)
library(countrycode)
library(lubridate)
library(ggplot2)
library(reshape)
library(caret)
library(lme4)
library(RColorBrewer)
library(hydroGOF)
setwd("~/Downloads/Science Replication and Data - Polling/")
dat <- read.csv("global_polling_replication_data.csv")
###########################
# Essential Feature Creation
#######################
dat$wts <- scale(1/(dat$days.to.elec+2), center=F)
dat$pro.incumbent <- scale(dat$pro.incumbent, center=F)
dat$logpolls <- log(abs(dat$pollmarg)+.00000001)
dat$sm.pollmarg <- NA
dat$Round.Date <- as.Date(as.character(dat$Round.Date), format="%m/%d/%Y")
# get elections that are trained and tested
elections.training <- read.csv("elections_training_wround09082016.csv")
########################
#Some basic descriptives
####################
dat <- dat[!is.na(dat$pollmarg), ]
dat <- dat[!is.na(dat$Poll.Date), ]
#Aggregating
sm.agg <- aggregate(cbind(pollmarg, realmarg, Round.Date, gdp.growth, year, region=as.numeric(as.factor(region)), incRun, l1polity2, wb.inflation, asp.gdp)~electionid, median, data=dat)
sm.agg = sm.agg[order(sm.agg$Round.Date), ]
#Summary of proportions in which incumbents and non-incumbents run and win
table(sm.agg$incRun, sm.agg$realmarg>0)/sum(!is.na(sm.agg$pollmarg))
#Rate of re-election for incumbents - 72.4%
total.inc <- sum(table(sm.agg$incRun, sm.agg$realmarg>0)[2,])
table(sm.agg$incRun, sm.agg$realmarg>0)[2,]/total.inc
#Rate of election for non-incumbents (chosen successors) - 55.68%
total.noninc <- sum(table(sm.agg$incRun, sm.agg$realmarg>0)[1,])
table(sm.agg$incRun, sm.agg$realmarg>0)[1,]/total.noninc
#Naive model of just picking the incumbent candidate - 62.3%
table(sm.agg$realmarg>0)/length(sm.agg$realmarg)
#Rate of election if polls say so
table(sm.agg$pollmarg*sm.agg$realmarg>=0)/sum(!is.na(sm.agg$pollmarg)) #overall - 86.3%
table(sm.agg$pollmarg[sm.agg$incRun==1]*sm.agg$realmarg[sm.agg$incRun==1]>=0)/sum(sm.agg$incRun==1) #incumbent officeholder running
table(sm.agg$pollmarg[sm.agg$incRun==0]*sm.agg$realmarg[sm.agg$incRun==0]>=0)/sum(sm.agg$incRun==0) #non-incumbent officeholder running
# RMSE OF NAIVE Poll-Only model
table(sm.agg$pollmarg[51:146]*sm.agg$realmarg[51:146]>0)/sum(!is.na(sm.agg$pollmarg[51:146])) #out of sample accuracy
postResample(sm.agg$pollmarg, sm.agg$realmarg)
postResample(sm.agg$pollmarg[51:146], sm.agg$realmarg[51:146])
### ANALYSIS SECTION
# Smoothing Model
trainingsize <- 50
dat$sm.pollmarg <- NA
# Step: creating the smoothed estimate for the training set
# Step: create loop creating smoothed estimate for each election in the testing data
get_preds <- function(pdat=dat, trainingset=elections.training, trainingsize= 50, leadTime = 0){
pdat$wts <- scale(1/(pdat$days.to.elec-leadTime+2), center=F) #
# Remove missing polls and dates
pdat <- pdat[!is.na(pdat$pollmarg), ]
pdat <- pdat[!is.na(pdat$Poll.Date), ]
for(i in 1:sum(elections.training$trainingset==0)){
elecs <- pdat$electionid %in% elections.training$electionid[ 1:(trainingsize+i)] #get T/F in or out of training set
#########
sm.mod <- try(lmer(pollmarg ~ pro.incumbent + (pro.incumbent|incRun) + (1|asp.gdp.bin) +(1|electionid)+(1|ccode)+(1|Pollster)+(1|region), na.action=na.exclude, weights=wts, data=pdat[ elecs, ], control=lmerControl(optimizer="bobyqa", boundary.tol = 1e-2, check.scaleX="silent.rescale")) )
#error handling
w <- warnings(); assign("last.warning", NULL, envir = baseenv()) #assign warning to w, then delete all warnings
#######
if(is.null(w)==F){ #if there is a warning, then print as such
print(paste("warning", i, "did not converge"))
w <- NULL
assign("last.warning", NULL, envir = baseenv())
}
#######
if(i==1){ #if it is the first round of the loop, predict the training set plus the first test election
pdat$sm.pollmarg[ elecs] <- predict(sm.mod, newdata=pdat[ elecs,])
}
#######
if(i>1){ #if it is not the first round of the loop, predict just the next test election
next.elec <- pdat$electionid %in% elections.training$electionid[ trainingsize + i ] #get just the election to make smoothed pred.
pdat$sm.pollmarg[ next.elec ] <- predict(sm.mod, newdata=pdat[next.elec, ]) #produces a prediction
print(elections.training$electionid[ trainingsize + i ])
}
print(paste(i, "successfully completed"))
}
return(pdat)
}
pdat1 <- get_preds(pdat=dat, trainingset=elections.training, trainingsize=50, leadTime = 0)
sm.agg <- aggregate(cbind(sm.pollmarg, Round.Date, pollmarg, realmarg, year, region=as.numeric(as.factor(region)), incRun, incApp, incExtend,  multiparty, polity2, l1polity2, wb.inflation, asp.gdp, Round=as.numeric(as.factor(Round)))~electionid, median, data=pdat1)
sum(sm.agg$sm.pollmarg*sm.agg$realmarg > 0, na.rm=T) / sum(!is.na(sm.agg$sm.pollmarg))
trcontrol <- trainControl(method="timeslice", initialWindow = 50, horizon=1, fixedWindow=F, savePredictions = T)# , indexOut = indexOut
#linear partial least squares with structural features only
sm.agg <- sm.agg[order(sm.agg$Round.Date), ]
library(caret); set.seed(13243)
run.finalsynthetic <- function(data=sm.agg, meth="pls", grid=expand.grid(ncomp=2)){
sm.agg <- sm.agg[order(sm.agg$Round.Date), ]
trcontrol <- trainControl(method="timeslice", initialWindow = 50, horizon=1, fixedWindow=F, savePredictions = T)# , indexOut = indexOut
#linear partial least squares
trmode <- train(realmarg~sm.pollmarg+l1polity2+wb.inflation, data=sm.agg, trControl=trcontrol, metric="RMSE", method=meth, tuneGrid=grid)
return(trmode)
}
sm.agg <- sm.agg[order(sm.agg$Round.Date), ]
trmode <- run.finalsynthetic(data=sm.agg, meth="pls", grid=expand.grid(ncomp=2))
#In-sample Results
trmod.insample <- train(realmarg~sm.pollmarg+l1polity2+wb.inflation, data=sm.agg[1:50,], trControl= trainControl(method="none", savePredictions = T), metric="RMSE", method="pls", tuneGrid= expand.grid(ncomp=2))
sum( predict(trmod.insample, newdata=sm.agg[1:50, ])*sm.agg$realmarg[1:50] > 0 )/50 # the in-sample rate
# Note, compared to polls alone in training sample, the polls are only 80% accurate
sum( sm.agg$pollmarg[1:50]*sm.agg$realmarg[1:50] >= 0 )/50
postResample( predict(trmod.insample, newdata=sm.agg[1:50, ]), sm.agg$realmarg[1:50]) # the in-sample rate
#Out-of-sample Results
sum( (trmode$pred$pred* trmode$pred$obs) >= 0) / length(trmode$pred$pred) #the out-of-sample rate
postResample(trmode$pred$pred, trmode$pred$obs)  #the out-of-sample RMSE
sum( (trmode$pred$pred* trmode$pred$obs) > 0) / length(trmode$pred$pred) #the out-of-sample rate
outsample <- sm.agg[order(sm.agg$Round.Date), ]
outsample <- outsample[51:146, ]
outsample$pred <- 1*(trmode$pred$pred*trmode$pred$obs > 0)
paste(outsample$ISO3[outsample$pred==1], outsample$year[outsample$pred==1], sep="-")
paste(outsample$ISO3[outsample$pred==0], outsample$year[outsample$pred==0], sep="-")
sm.agg$ISO3 <- countrycode(as.numeric(substr(sm.agg$electionid, 1, 3)), origin="cown", destination="iso3c")
outsample <- sm.agg[order(sm.agg$Round.Date), ]
outsample <- outsample[51:146, ]
outsample$pred <- 1*(trmode$pred$pred*trmode$pred$obs > 0)
paste(outsample$ISO3[outsample$pred==1], outsample$year[outsample$pred==1], sep="-")
paste(outsample$ISO3[outsample$pred==0], outsample$year[outsample$pred==0], sep="-")
paste(outsample$ISO3[outsample$pred==0], outsample$year[outsample$pred==0], sep="-")
names(sm.agg)
paste(outsample$ISO3[outsample$pred==0], outsample$year[outsample$pred==0],outsample$Round.Date[outsample$pred==0], sep="-")
paste(outsample$ISO3[outsample$pred==0], outsample$year[outsample$pred==0],outsample$Round[outsample$pred==0], sep="-")
View(sm.agg)
library(RJSONIO)
library(streamR)
library(RCurl)
library(RJSONIO)
library(stringr)
library(ROAuth)
requestURL <- "https://api.twitter.com/oauth/request_token"
accessURL <- "https://api.twitter.com/oauth/access_token"
authURL <- "https://api.twitter.com/oauth/authorize"
consumerKey <- "JeNt8110Hl92l2hVFdnWsBSdj" # From dev.twitter.com
consumerSecret <- " gxBQiNo1O6UskwHlxWPEQcqVpw0ig8lbZALcDaRjxxLzYBmWlA" # From dev.twitter.com
my_oauth <- OAuthFactory$new(consumerKey = consumerKey,
consumerSecret = consumerSecret,
requestURL = requestURL,
accessURL = accessURL,
authURL = authURL)
my_oauth$handshake(cainfo = system.file("CurlSSL", "cacert.pem", package = "RCurl"))
